{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "from pathlib import Path\n",
    "import re\n",
    "import random"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 将混合在一起的wav文件放入各个spk目录下"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "import shutil\n",
    "SLR85_dir = \"/home1/meichaoyang/Dataset/feats/SLR85/hifi_16k/train_music\"\n",
    "SLR_wav_list=list(Path(SLR85_dir).rglob(\"*.wav\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "for path_item in SLR_wav_list:\n",
    "    wav_path = str(path_item)\n",
    "    spk = os.path.basename(wav_path).split(\"_\")[0]\n",
    "    spk_dir = os.path.dirname(wav_path)+\"/\"+spk\n",
    "    if not os.path.isdir(spk_dir):\n",
    "        os.mkdir(spk_dir)\n",
    "    shutil.move(wav_path,spk_dir + \"/\" +os.path.basename(wav_path))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 读取原始标注到corp_map中"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 处理train数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "slr_train_dir = \"/home1/meichaoyang/Dataset/SLR85/train\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "slr_train_list=list(Path(slr_train_dir).rglob(\"*.wav\"))\n",
    "slr_train_list.sort(key=lambda Path: os.path.basename(os.path.splitext((str(Path)))[0]))\n",
    "slr_scp_train = {}\n",
    "for path_item in slr_train_list:\n",
    "    wav_path = str(path_item)\n",
    "    slr_scp_train[os.path.basename(os.path.splitext((wav_path))[0])] = wav_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# slr_scp_train\n",
    "\"_2_\" in \"SV0001_2_02_F0663\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 生成wav.scp、trans.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/home1/meichaoyang/Dataset/feats/SLR85/hifi/train/trans.txt', 'w') as f1:\n",
    "    with open('/home1/meichaoyang/Dataset/feats/SLR85/hifi/train/wav.scp', 'w') as f2:\n",
    "        for i in sorted(slr_scp_train):\n",
    "            if \"_7_\" in i:\n",
    "                if i.split(\"_\")[-1][0] == \"S\":\n",
    "                    f1.write(i+'\\t'+\"你好SIL米雅\"+'\\n')\n",
    "                    f2.write(i+'\\t'+slr_scp_train[i]+'\\n')\n",
    "                else:\n",
    "                    f1.write(i+'\\t'+\"你好米雅\"+'\\n')\n",
    "                    f2.write(i+'\\t'+slr_scp_train[i]+'\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 处理dev数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "slr_dev_dir = \"/home1/meichaoyang/Dataset/feats/SLR85/hifi/dev\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "slr_dev_list=list(Path(slr_dev_dir).rglob(\"*.wav\"))\n",
    "slr_dev_list.sort(key=lambda Path: os.path.basename(os.path.splitext((str(Path)))[0]))\n",
    "slr_scp_dev = {}\n",
    "for path_item in slr_dev_list:\n",
    "    wav_path = str(path_item)\n",
    "    slr_scp_dev[os.path.basename(os.path.splitext((wav_path))[0])] = wav_path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 生成wav.scp、trans.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/home1/meichaoyang/Dataset/feats/SLR85/hifi/dev/trans.txt', 'w') as f1:\n",
    "    with open('/home1/meichaoyang/Dataset/feats/SLR85/hifi/dev/wav.scp', 'w') as f2:\n",
    "        for i in sorted(slr_scp_dev):\n",
    "            if \"_7_\" in i:\n",
    "                if i.split(\"_\")[-1][0] == \"S\":\n",
    "                    f1.write(i+'\\t'+\"你好SIL米雅\"+'\\n')\n",
    "                    f2.write(i+'\\t'+slr_scp_dev[i]+'\\n')\n",
    "                else:\n",
    "                    f1.write(i+'\\t'+\"你好米雅\"+'\\n')\n",
    "                    f2.write(i+'\\t'+slr_scp_dev[i]+'\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 处理test数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "slr_test_dir = \"/home1/meichaoyang/Dataset/feats/SLR85/hifi/test\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "slr_test_list=list(Path(slr_test_dir).rglob(\"*.wav\"))\n",
    "slr_test_list.sort(key=lambda Path: os.path.basename(os.path.splitext((str(Path)))[0]))\n",
    "slr_scp_test = {}\n",
    "for path_item in slr_dev_list:\n",
    "    wav_path = str(path_item)\n",
    "    slr_scp_test[os.path.basename(os.path.splitext((wav_path))[0])] = wav_path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 生成wav.scp、trans.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/home1/meichaoyang/Dataset/feats/SLR85/hifi/test/trans.txt', 'w') as f1:\n",
    "    with open('/home1/meichaoyang/Dataset/feats/SLR85/hifi/test/wav.scp', 'w') as f2:\n",
    "        for i in sorted(slr_scp_test):\n",
    "            if \"_7_\" in i:\n",
    "                if i.split(\"_\")[-1][0] == \"S\":\n",
    "                    f1.write(i+'\\t'+\"你好SIL米雅\"+'\\n')\n",
    "                    f2.write(i+'\\t'+slr_scp_test[i]+'\\n')\n",
    "                else:\n",
    "                    f1.write(i+'\\t'+\"你好米雅\"+'\\n')\n",
    "                    f2.write(i+'\\t'+slr_scp_test[i]+'\\n')\n",
    "# with open('dev/wav.scp', 'w') as f:\n",
    "#     for i in sorted(slr_scp_test):\n",
    "#         f.write(i+'\\t'+slr_scp_test[i]+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
